/* * Copyright 2013 SciFY NPO <info@scify.org>. * * This product is part of the NewSum Free Software. * For more information about NewSum visit * * http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * If this code or its output is used, extended, re-engineered, integrated, * or embedded to any extent in another software or hardware, there MUST be * an explicit attribution to this work in the resulting source code, * the packaging (where such packaging exists), or user interface * (where such an interface exists). * The attribution must be of the form "Powered by NewSum, SciFY" */ package org.scify.NewSumServer.Server.Structures; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import org.scify.NewSumServer.Server.Utils.Utilities; /** * * Describes an article fetched from a URL feed. * Use simple methods to set or to get values * @author ggianna * @author George K. <gkiom@scify.org> * */ public class Article implements java.io.Serializable { /** * The Source URL that the article is derived from. */ protected String Source; /** * The article. The RSS description of the article fetched. */ protected String Text; /** * The Title of the Article. The title fetched from the RSS feed. */ protected String Title; /** * The Category that this Article belongs to. E.g. Sports, Top News, etc. */ protected String Category; /** * The exact URL to the feed where the article was found at. */ protected String Feed; /** * The date of the Article, in string format. */ protected String sdate; /** * The date that the Article was created. * Most of the times, it is fetched from the Feed Provider. * Otherwise (if the feed does not provide the article date), it * is the date the article was retrieved */ protected Calendar date; /** * set True if this article is to be accessed by the classification trainer */ protected Boolean toWrap; /** * The Constructor of the Article Class. Initializes a new Article Object, * with the below parameters. * @param Source The source containing the article (ie the permalink) * @param Title The title of the article * @param Text The description of the article * @param Category The category that the article belongs to * @param Feed The feed that the article came from * @param toWrap If true, the article's category will used to train the * machine learning algorithm */ public Article(String Source, String Title, String Text, String Category, String Feed, Boolean toWrap) { this.Source = Source; if (Text != null) { this.Text = cleanUp(Text.trim()); } else { this.Text =""; } this.Title = cleanUp(Title); this.Category = Category; this.Feed = Feed; this.toWrap = toWrap; } /** * * @return the (String) permalink that contains the article. */ public String getSource() { return Source; } /** * * @return the (String) description of the Article */ public String getText() { return Text; } /** * * @return the (String) Title of the Article */ public String getTitle() { return Title; } /** * * @return The category that the article belongs to */ public String getCategory() { return Category; } /** * * @return The Feed link that the Article came from */ public String getFeed() { return Feed; } /** * Sets the source that contains the article * @param Source The Source containing the Article */ public void setSource(String Source) { this.Source = Source; } /** * Sets the description of the Article (the Article Body) * @param Text the Description of the Article */ public void setText(String Text) { if (Text != null) { this.Text = cleanUp(Text); } else { this.Text = ""; } } /** * Sets the title of the Article * @param Title The Title of the article */ public void setTitle(String Title) { this.Title = cleanUp(Title); } /** * Sets the Category that the article belongs to * @param Category The category that the article belongs to */ public void setCategory(String Category) { this.Category = Category; } /** * * @param Cal the calendar to set */ public void setDate(Calendar Cal) { this.date = Cal; SimpleDateFormat df = new SimpleDateFormat(); df.applyPattern("dd.MM.yyyy - HH:mm:ss z"); this.sdate = df.format(date.getTime()); } /** * Sets the Date that the article was created, in Calendar format * @param date The Date the article was created */ public void setDate(Date date) { this.date = Utilities.convertDateToCalendar(date); this.sdate = date.toString(); } /** * * @return The date the article was created in string representation */ public String getDatetoString() { SimpleDateFormat df = new SimpleDateFormat(); df.applyPattern("dd.MM.yyyy - HH:mm:ss z"); return df.format(this.date.getTime()); } /** * * @return The date the article was created */ public Calendar getDate() { return this.date; } /** * * @return true if the Article will be used to train the * classification package */ public boolean getToWrap() { return this.toWrap; } @Override public String toString() { return Title + "\n" + Text; } /** * Cleans up extra whitespace from the given text * @param sText the Text to cleanup * @return the text without any extra whitespace */ private String cleanUp(String sText) { if (sText != null) { sText = Jsoup.clean(sText, Whitelist.none()); sText = sText.replaceAll("«|»", ""); sText = sText.replaceAll(""", ""); sText = sText.replaceAll(" ", ""); sText = sText.replaceAll(">", ""); sText = sText.replaceAll("&[lr]aquo;", ""); return sText; } else { return ""; } } }